************************************************************************
* Date: October 26, 2017
* David Simon
* Code for matching Schools to Road


/*
1. For each school, calculate shortest distance between each school and  each 
road.    
2. Assign bearing  between road and school
3. merge on Mattis data matched to school with wind direction.  
*/
************************************************************************



set more off
cap log close


	
**************Globals********************
	global home 

	global winddata 
	global roaddata 
	global schooldata 
	global output 
	global samples 


log using "$output\allsclmjrd.log", replace


**********start by loading in school data and matching to closest road**************

use "$schooldata\NCES_2010.dta", replace

*Now find nearest road based on lat and long;

preserve


import delimited "$roaddata\us_roads\usroads_lat_coords.csv", clear  /* this is the interstate data, with lat and long */



rename (_id x y) (_ID longitude latitude)
	gen UShigh=1
	label var UShigh "a US highway"
	
	
merge m:1 _ID using "$roaddata\us_roads\usroads.dta"

drop _merge

save "$roaddata\us_roads\usroads_smp.dta", replace


gen id=_n



***************Create line segments***********************************	
	***Getting prior segment piece's lat/long, if it's from the same roadway	- JAH
foreach var of varlist latitude longitude {
	gen `var'_prior=`var'[_n-1]
	replace `var'_prior=. if `var'==0
	replace `var'_prior=. if `var'_prior==0
	}

	
***getting segments (will be one less than the number of end points (e.g., if you have 3 points, you have 2 line segments) - JAH	
	geodist latitude longitude latitude_prior longitude_prior, generate(segmentdistance) miles
	label var segmentdistance "Road segment distance in miles"
	sum segmentdistance, d
	*hist segmentdistance if segmentdistance>0.1, freq
	
***AND NOW: DIVIDING UP SEGMENTS >0.1: For ordering purposes:
	gen n=_n

	gen segexp=floor(segmentdistance*100)+1  /**If a segment is longer than 0.01 mi,
	round up to the nearest 10th, multiply by 10, and divide it into that number of segments
	Change X in segmentdistance*X to make the minimum distance 1/X miles */
		
	expand segexp
	sort n
	*get a count by segment:
	by n: generate segord=_n

	**Replacing:
	foreach var of varlist latitude longitude {
		gen `var'_original=`var'
		*take the previous latitude, add on the extra fraction of the distance to get to the next value in the data 
		*(or each segment, so multiple by order 1-2-3...)
		gen dist`var' = (`var'-`var'_prior)/segexp if segexp>1
		replace `var'=`var'_prior+segord*(`var'-`var'_prior)/segexp if segexp>1
		*useful for checking: 
		order `var' `var'_original `var'_prior segexp dist`var' segord segmentdistance n
												}	
*DES: something to think about: precision of storage in stata: I think this is OK though
												
	drop id distlatitude distlongitude
	gen id=_n
	order latitude longitude segmentdistance UShigh ROADWAY BEGIN_POST END_POST Shape_Leng RANK ROUTE RouteNum id


	**Chceck 1:  Verify no more long segments:
	***Getting prior segment piece's lat/long, if it's from the same roadway	- JAH
	*foreach var of varlist latitude longitude {
	*	gen `var'_prior_2=`var'[_n-1]
	*	replace `var'_prior_2=. if `var'==0
	*	replace `var'_prior_2=. if `var'_prior==0
	*	}
	***getting segments (will be one less than the number of end points (e.g., if you have 3 points, you have 2 line segments) - JAH	
	*geodist latitude longitude latitude_prior_2 longitude_prior_2, generate(segmentdistance_2) miles
	*	label var segmentdistance "Road segment distance in miles"
	*	sum segmentdistance_2, d
	*	hist segmentdistance_2 if segmentdistance_2>0.1, freq
	***saving this new data as a temporary file:
	tempfile USrd
	save "`USrd'"	
	
restore


********************now create calculate nearest road to school***************


**2. For US highways/State Routes:
geonear ncessch latcod loncod using "`USrd'", neighbors(id latitude longitude) miles //this is where mi_to_nid is made

*merge road data - must set to local drive
rename nid id

merge m:1 id using `USrd'
tab _merge

drop if _merge==2
drop _merge


rename latitude road_lat
rename longitude road_lon
rename id road_id



rename ROUTE routename
rename ROADWAY roadway 

keep road_lat road_lon latcod loncod ncessch mi_to_nid roadway routename RouteNum 

**************Now try to calculate initial bearing of school to road  *******************
* check this against this formula:  http://www.movable-type.co.uk/scripts/latlong.html



	g lat1=latcod*_pi/180
	g lat2=road_lat*_pi/180
	g lon1=loncod*_pi/180
	g lon2=road_lon*_pi/180
//=ATAN2(COS(lat1)*SIN(lat2)-SIN(lat1)*COS(lat2)*COS(lon2-lon1), SIN(lon2-lon1)*COS(lat2)) - from http://www.movable-type.co.uk/scripts/latlong.html
	g theta=atan2(sin(lon2-lon1)*cos(lat2), cos(lat1)*sin(lat2)-sin(lat1)*cos(lat2)*cos(lon2-lon1)) 
	gen degrees = (180*theta)/_pi
	g bearing=mod((degrees+360), 360)






*reality check: this gives us a bearing between 1 and 360
	sum bearing

	rename bearing angle_degrees



*now I found at least two online calculators to apply the formula to
* http://www.movable-type.co.uk/scripts/latlong.html
* and: https://planetcalc.com/7042/
* and if you want one that does a map with it:  https://www.mobilefish.com/services/distance_calculator/distance_calculator.php

sum mi_to_nid
sum mi_to_nid if mi_to_nid<=1
keep if mi_to_nid<=1
	list angle_degrees latcod loncod road_lat road_lon in 1/10
	
	
save "$samples\schooltoUSH.dta", replace
